﻿using System;
using System.Text.RegularExpressions;

namespace StarHelper
{
    /// <summary>
    /// Html辅助类
    /// </summary>
    public static class HtmlHelper
    {
        /// <summary>
        /// 移除html
        /// </summary>
        /// <param name="html"></param>
        /// <param name="length"></param>
        /// <returns></returns>
        public static string ReplaceHtmlTag(this string html, int length = 0, string suffix = "")
        {
            if (string.IsNullOrWhiteSpace(html)) return null;
            string strText = System.Text.RegularExpressions.Regex.Replace(html, "<[^>]+>", string.Empty);
            strText = System.Text.RegularExpressions.Regex.Replace(strText, "&[^;]+;", string.Empty);

            strText = Regex.Replace(strText, "\\t", string.Empty);
            strText = Regex.Replace(strText, "\\n", " ");
            if (length > 0 && strText.Length > length)
                return strText.Substring(0, length) + suffix;

            return strText;
        }

        /**/
        // /
        // / 转半角的函数(DBC case)
        // /
        // /任意字符串
        // /半角字符串
        // /
        // /全角空格为12288，半角空格为32
        // /其他字符半角(33-126)与全角(65281-65374)的对应关系是：均相差65248
        // /
        public static string ToDBC(this string input)
        {
            char[] c = input.ToCharArray();
            for (int i = 0; i < c.Length; i++)
            {
                if (c[i] == 12288)
                {
                    c[i] = (char)32;
                    continue;
                }
                if (c[i] > 65280 && c[i] < 65375)
                    c[i] = (char)(c[i] - 65248);
            }
            return new string(c);
        }


        public static string ToMipHtml(this string html)
        {
            if (string.IsNullOrWhiteSpace(html)) return "";
            Regex res = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
            var noImg = res.Replace(html, "<mip-img src=\"$1\"></mip-img>");
            res = new Regex(@"[\s\t\r\n]style[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*[^""']*[""']?");
            return res.Replace(noImg, string.Empty);
        }

        /// <summary>
        /// 获取Img的路径
        /// </summary>
        /// <param name="htmlText">Html字符串文本</param>
        /// <returns>以数组形式返回图片路径</returns>
        public static string[] GetHtmlImageUrlList(string htmlText, string src = "src")
        {
            if (string.IsNullOrWhiteSpace(src)) src = "src";
            Regex regImg = new Regex($@"<img\b[^<>]*?\b{src}[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
            //新建一个matches的MatchCollection对象 保存 匹配对象个数(img标签)
            MatchCollection matches = regImg.Matches(htmlText);
            int i = 0;
            string[] sUrlList = new string[matches.Count];
            //遍历所有的img标签对象
            foreach (Match match in matches)
            {
                //获取所有Img的路径src,并保存到数组中
                sUrlList[i++] = match.Groups["imgUrl"].Value;
            }
            return sUrlList;
        }

        /// <summary>
        /// 文章内容中替换图片为Base64
        /// </summary>
        /// <param name="htmlText"></param>
        /// <returns></returns>
        public static string ReplaceImageToBase64(string htmlText, string src = "src")
        {
            if (string.IsNullOrWhiteSpace(htmlText)) return "";
            if (string.IsNullOrWhiteSpace(src)) src = "src";
            if (htmlText.Contains("<img"))
            {
                var imgs = GetHtmlImageUrlList(htmlText, src);
                if (imgs.Length > 0)
                {
                    System.Net.WebClient webclient = new System.Net.WebClient();
                    foreach (var img in imgs)
                    {
                        if (!string.IsNullOrWhiteSpace(img) && img.StartsWith("http"))
                        {
                            try
                            {
                                byte[] bytes = webclient.DownloadData(img);
                                var base64 = Convert.ToBase64String(bytes);
                                if (src != "src")
                                {
                                    htmlText = htmlText.Replace(src + "=\"", "src=\"");
                                }
                                htmlText = htmlText.Replace(img, "data:image/png;base64," + base64);
                            }
                            catch { }
                        }
                    }
                }
            }
            return htmlText;
        }

        /// <summary>
        /// 判断字符串中是否含有中文
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public static bool HasChinese(this string str)
        {
            return Regex.IsMatch(str, @"[\u4e00-\u9fa5]");
        }

        /// <summary>
        /// 移除html tags
        /// </summary>
        /// <param name="html"></param>
        /// <returns></returns>
        public static string RemoveHtmlTags(string html)
        {
            char[] array = new char[html.Length];
            int arrayIndex = 0;
            bool inside = false;

            for (int i = 0; i < html.Length; i++)
            {
                char let = html[i];
                if (let == '<')
                {
                    inside = true;
                    continue;
                }
                if (let == '>')
                {
                    inside = false;
                    continue;
                }
                if (!inside)
                {
                    array[arrayIndex] = let;
                    arrayIndex++;
                }
            }
            return new string(array, 0, arrayIndex);
        }

        /// <summary>
        /// 移除html a tag
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public static string RemoveHtmlA(string str)
        {
            //str = Regex.Replace(str, @"<a\s*[^>]*>", "", RegexOptions.IgnoreCase);
            //str = Regex.Replace(str, @"<a\s*[^>]*>", "", RegexOptions.IgnoreCase);
            str = Regex.Replace(str, @"<a\s*[^>]*>", "", RegexOptions.IgnoreCase);
            str = Regex.Replace(str, @"</a>", "", RegexOptions.IgnoreCase);

            return str;
        }

        /// <summary> 
        /// 清除文本中Html的标签 
        /// </summary> 
        /// <param name="content"></param> 
        /// <returns></returns> 
        public static string ClearHtml(string content, bool all = true)
        {
            content = ClearReplaceHtml("</?style[^>]*>", "stylex", content);
            content = ClearReplaceHtml("</?class[^>]*>", "classx", content);
            content = ClearReplaceHtml("</?a[^>]*>", "", content);
            content = ClearReplaceHtml("</?img[^>]*>", "", content);
            content = ClearReplaceHtml("</?image[^>]*>", "", content);
            content = ClearReplaceHtml("</?b[^>]*>", "", content);

            if (all)
            {
                content = ClearReplaceHtml("&#[^>]*;", "", content);
                content = ClearReplaceHtml("</?marquee[^>]*>", "", content);
                content = ClearReplaceHtml("</?object[^>]*>", "", content);
                content = ClearReplaceHtml("</?param[^>]*>", "", content);
                content = ClearReplaceHtml("</?embed[^>]*>", "", content);
                content = ClearReplaceHtml("</?table[^>]*>", "", content);
                content = ClearReplaceHtml(" ", "", content);
                content = ClearReplaceHtml("</?tr[^>]*>", "", content);
                content = ClearReplaceHtml("</?th[^>]*>", "", content);
                content = ClearReplaceHtml("</?p[^>]*>", "", content);
                content = ClearReplaceHtml("</?tbody[^>]*>", "", content);
                content = ClearReplaceHtml("</?li[^>]*>", "", content);
                content = ClearReplaceHtml("</?span[^>]*>", "", content);
                content = ClearReplaceHtml("</?div[^>]*>", "", content);
                content = ClearReplaceHtml("</?th[^>]*>", "", content);
                content = ClearReplaceHtml("</?td[^>]*>", "", content);
                content = ClearReplaceHtml("on(mouse|exit|error|click|key)", "", content);
                content = ClearReplaceHtml("<\\?xml[^>]*>", "", content);
                content = ClearReplaceHtml("<\\/?[a-z]+:[^>]*>", "", content);
                content = ClearReplaceHtml("</?font[^>]*>", "", content);
                content = ClearReplaceHtml("</?u[^>]*>", "", content);
                content = ClearReplaceHtml("</?i[^>]*>", "", content);
                content = ClearReplaceHtml("</?strong[^>]*>", "", content);
                content = ClearReplaceHtml("</?strong[^>]*>", "", content);
                content = ClearReplaceHtml(" ", "", content);
            }


            Regex r = new Regex(@"\s+");
            content = r.Replace(content, "");

            content.Trim();
            string clearHtml = content;
            return clearHtml;
        }

        /// <summary> 
        /// 清除文本中的Html标签 
        /// </summary> 
        /// <param name="patrn">要替换的标签正则表达式</param> 
        /// <param name="strRep">替换为的内容</param> 
        /// <param name="content">要替换的内容</param> 
        /// <returns></returns> 
        private static string ClearReplaceHtml(string patrn, string strRep, string content)
        {
            if (string.IsNullOrEmpty(content))
            {
                content = "";
            }
            Regex rgEx = new Regex(patrn, RegexOptions.IgnoreCase);
            string strTxt = rgEx.Replace(content, strRep);
            return strTxt;
        }
    }
}
